package cn.xo68.boot.webgather.resolve.htmlunit;

import cn.xo68.boot.webgather.resolve.ContentResolve;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * 采集执行器
 * @author wuxie
 * @date 2018-7-16
 */
public class GatherExecutor{

    private static final Logger logger=LoggerFactory.getLogger(GatherExecutor.class);

    private final WebClientFactory webClientFactory;

    public GatherExecutor(WebClientFactory webClientFactory) {
        this.webClientFactory = webClientFactory;
    }

    public void execute(WebRequest webRequest, ContentResolve contentResolve){

        WebClient webClient= null;
        try {
            webClient = webClientFactory.getObject();
        } catch (Exception e) {
            logger.error("从工厂获得WebClient异常", e);
            contentResolve.setError("从工厂获得WebClient异常");
            return;
        }
        HtmlPage htmlPage = null;
        try {
            htmlPage = webClient.getPage(webRequest);
        } catch (IOException e) {
            logger.error("加载页面信息异常", e);
            contentResolve.setError("加载页面信息异常");
            return;
        }
        // 等待JS驱动dom完成获得还原后的网页
        webClient.waitForBackgroundJavaScript(webClientFactory.getWebGatherProperties().getWaitForBackgroundJavaScriptMillis());
        // 网页內容
        /* System.out.println(htmlPage.asXml()); */

        //logger.debug("url:{},content:/r {}", url, htmlPage.asXml());

        Document document=Jsoup.parse(htmlPage.asXml());

        contentResolve.setDocument(document);
        //logger.info("标题：{}", doc.title());

        //使用jsoup解析页面
        //webClient.getCurrentWindow().setEnclosedPage(htmlPage);
        webClientFactory.releaseExecutor(webClient);
    }

}
